home *** CD-ROM | disk | FTP | other *** search
- /*** analog 1.9beta ***/
- /* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/ */
-
- /*** sscanf.c; functions to replace sscanf(), which is far too slow, in
- certain specific cases ***/
-
- #include "analhea2.h"
-
- /*** Now the scanning routines ***/
-
- int sscanf_date(char *inputline, int *date, int *monthno, int *year, int *hr,
- int *min)
- { /* scanning date from common/agent/referer log */
-
- extern int strtomonth(); /* in utils.c */
-
- register char *cin = inputline;
- char *cout;
- char month[4];
- int i;
-
- if (!isdigit(*cin))
- return(0);
- else
- *date = 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(0);
- else
- *date += (*cin - '0');
-
- /* read in month */
- cin++;
- if (*cin != '/')
- return(1);
- cin++;
- cout = month;
- for (i = 0; i < 3 && *cin != '\0'; i++) {
- *cout = *cin;
- cout++;
- cin++;
- }
- if (*cin == '\0')
- return(1);
- *cout = '\0';
- if ((*monthno = strtomonth(month)) == ERR)
- return(1);
-
- /* read in year */
- if (*cin != '/')
- return(2);
- cin++;
- if (!isdigit(*cin))
- return(2);
- else
- *year = 1000 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(2);
- else
- *year += 100 * (*cin - '0');
- cin++;
- if (!isdigit(*cin)) {
- if (*cin != ':')
- return(2);
- else { /* allow two digit years for Spyglass server */
- *year /= 100;
- *year += 1900;
- if (*year < 1970)
- *year += 100;
- }
- }
- else {
- *year += 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(2);
- else
- *year += (*cin - '0');
- cin++;
- if (*cin != ':')
- return(3);
- }
-
- /* read in hour */
- cin++;
- if (!isdigit(*cin))
- return(3);
- else
- *hr = 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(3);
- else
- *hr += (*cin - '0');
-
- /* read in minute */
- cin++;
- if (*cin != ':')
- return(4);
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *min = 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *min += (*cin - '0');
-
- /* don't read in second, but check it for correct form */
- cin++;
- if (*cin != ':')
- return(4);
- cin++;
- if (!isdigit(*cin))
- return(4);
- cin++;
- if (!isdigit(*cin))
- return(4);
-
- return(5);
- }
-
- int sscanf_olddate(char *inputline, int *date, int *monthno, int *year,
- int *hr, int *min)
- { /* the same thing for NCSA old-style and error logs */
-
- extern int strtomonth(); /* in utils.c */
-
- register char *cin = inputline;
- char *cout;
- char month[4];
- int i;
-
- /* ignore day of week, so scan until next ' ' */
- for (cin++; *cin != ' ' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(0);
-
- /* read in month */
- cin++;
- cout = month;
- for (i = 0; i < 3 && *cin != '\0'; i++) {
- *cout = *cin;
- cout++;
- cin++;
- }
- if (*cin == '\0')
- return(0);
- *cout = '\0';
- if ((*monthno = strtomonth(month)) == ERR)
- return(1);
-
- /* read in date */
- if (*cin != ' ')
- return(1);
- cin++;
- if (!isdigit(*cin) && *cin != ' ')
- return(1);
- else if (*cin != ' ')
- *date = 10 * (*cin - '0');
- else
- *date = 0;
- cin++;
- if (!isdigit(*cin))
- return(1);
- else
- *date += (*cin - '0');
-
- /* read in hour */
- cin++;
- if (*cin != ' ')
- return(2);
- cin++;
- if (!isdigit(*cin))
- return(2);
- else
- *hr = 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(2);
- else
- *hr += (*cin - '0');
-
- /* read in minute */
- cin++;
- if (*cin != ':')
- return(3);
- cin++;
- if (!isdigit(*cin))
- return(3);
- else
- *min = 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(3);
- else
- *min += (*cin - '0');
-
- /* ignore second (but check format) */
- cin++;
- if (*cin != ':')
- return(4);
- cin++;
- if (!isdigit(*cin))
- return(4);
- cin++;
- if (!isdigit(*cin))
- return(4);
- cin++;
- if (*cin != ' ')
- return(4);
-
- /* read year */
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *year = 1000 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *year += 100 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *year += 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(4);
- else
- *year += (*cin - '0');
- return(5);
- }
-
- int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date,
- int *monthno, int *year, int *hr, int *min,
- char filename[MAXSTRINGLENGTH],
- char referer[MAXSTRINGLENGTH], char agent[MAXSTRINGLENGTH],
- int *code, char bytestr[16], size_t preflength)
- { /* scanning 'common' format logfile entries */
- extern flag included(); /* in alias.c */
-
- extern flag bq, Bq, fq;
- extern struct include *noexpandhead;
-
- register char *cin = inputline; /* the character we are reading */
- register char *cout; /* where we are putting it */
- int i;
-
- /* read in hostname */
- i = 0;
- for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
- cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- if (*cin != ' ')
- return(0);
- *cout = '\0';
-
- /* scan until next '[' */
- for (cin++; *cin != '[' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(1);
-
- /* read in date */
- cin++;
- if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
- return(1);
- else
- cin += 20;
-
- /* ignore timezone; so scan to next '"' */
- for ( ; *cin != '"' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(6);
-
- /* ignore method; so read to next ' ' */
- for (cin++; *cin != ' ' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(6);
-
- /* read in filename */
- cin++;
- i = 0;
- for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' &&
- *cin != '?' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */
- for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
- i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- }
- if (*cin != ' ' && *cin != '"' && *cin != '?')
- return(6);
-
- /* scan to next " */
- for ( ; *cin != '"' && *cin != '\0' ; cin++)
- ;
- if (*cin == '\0')
- return(7);
-
- /* read in return code; always 3 digits, or a - (successes; call them 299) */
- cin++;
- if (*cin != ' ')
- return(7);
- cin++;
- if (!isdigit(*cin))
- if (*cin == '-' && *(cin + 1) == ' ')
- *code = 299;
- else
- return(7);
- else {
- *code = 100 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(7);
- else
- *code += 10 * (*cin - '0');
- cin++;
- if (!isdigit(*cin))
- return(7);
- else
- *code += (*cin - '0');
- }
-
- /* read in bytestr */
- cin++;
- if (*cin != ' ')
- return (8);
- cin++;
- i = 0;
- for (cout = bytestr; *cin != ' ' && *cin != '\n' && *cin != '\0' && i < 16;
- cin++) {
- *cout = *cin;
- cout++;
- }
- *cout = '\0';
-
- /* Finally, try and read in referer and agent of NCSA combined format */
- if (*cin != ' ' || (!fq && !bq && !Bq))
- return(9);
- if (*(++cin) != '"')
- return(9);
- i = 0;
- cin++;
- for (cout = referer; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
- cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin != '"')
- return(9);
- if (*(++cin) != ' ')
- return(10);
- if (*(++cin) != '"')
- return(10);
- i = 0;
- cin++;
- for (cout = agent; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
- cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin != '"')
- return(10);
- else
- return(11);
-
- }
-
- int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], int *monthno,
- int *date, int *hr, int *min, int *year,
- char filename[MAXSTRINGLENGTH], size_t preflength)
- { /* scanning NCSA old-style logfile entries */
- extern flag included(); /* in alias.c */
-
- extern struct include *noexpandhead;
-
- register char *cin = inputline; /* the character we are reading */
- register char *cout; /* where we are putting it */
- int i;
-
- /* read in hostname */
- i = 0;
- for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
- cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- if (*cin != ' ')
- return(0);
- *cout = '\0';
-
- /* scan until next '[' */
- for (cin++; *cin != '[' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(1);
-
- /* read in date */
- cin++;
- if (sscanf_olddate(cin, date, monthno, year, hr, min) < 5)
- return(1);
- else
- cin += 24;
-
- /* ignore method, so skip to second space */
- for ( ; *cin != ' ' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(6);
-
- for (cin++; *cin != ' ' && *cin != '\0'; cin++)
- ;
- if (*cin == '\0')
- return(6);
-
- /* finally, read in the filename */
- cin++;
- i = 0;
- for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' &&
- *cin != '\0' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */
- for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
- i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- }
- return (7);
-
- }
-
- int sscanf_domains(char *inputline, char string1[MAXSTRINGLENGTH],
- char string2[MAXSTRINGLENGTH])
- { /* scanning the domains file */
- register char *cin = inputline;
- register char *cout;
- int i;
-
- /* run past any white space */
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- /* if no strings on this line, return 0 */
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(0);
-
- /* otherwise fill up string 1; coerce domains to lower case */
- i = 0;
- for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
- *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = tolower(*cin);
- cout++;
- i++;
- }
-
- /* is that the end of the line (maybe after some white space)? */
- if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
- return(1);
-
- *cout = '\0';
- cin++;
-
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(1);
-
- /* otherwise fill up string 2 */
- for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
-
- *cout = '\0';
- return(2);
-
- }
-
- int sscanf_config(char *inputline, char string1[MAXSTRINGLENGTH],
- char string2[MAXSTRINGLENGTH],
- char string3[MAXSTRINGLENGTH])
- { /* scanning the config file */
- register char *cin = inputline;
- register char *cout;
- int i;
-
- /* run past any white space */
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- /* if no strings on this line, return 0 */
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(0);
-
- /* otherwise fill up string 1; convert arguments to upper case */
- i = 0;
- for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
- *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
-
- *cout = '\0';
-
- /* is that the end of the line (maybe after some white space)? */
- if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
- return(1);
-
- cin++;
-
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(1);
-
- /* if string 2 starts with a quote mark, fill up until the next quote
- mark. Otherwise, just fill until the next space */
-
- if (*cin == '\'') {
- cin++;
- for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
- else if (*cin == '"') {
- cin++;
- for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '"' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
- else {
- for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
- *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
-
- *cout = '\0';
-
- /* is that the end of the line (maybe after some white space)? */
- if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
- return(2);
-
- cin++;
-
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(2);
-
- /* otherwise fill up string 3 */
- if (*cin == '\'') {
- cin++;
- for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
- else if (*cin == '"') {
- cin++;
- for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '"' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
- else {
- for (cout = string3; *cin != '#' && *cin != '\n' && *cin != '\0' &&
- *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- }
-
- *cout = '\0';
-
- /* is that the end of the line (maybe after some white space)? */
- if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
- return(3);
-
- cin++;
-
- while (*cin == ' ' || *cin == '\t')
- cin++;
-
- if (*cin == '#' || *cin == '\n' || *cin == '\0')
- return(3);
-
- return(4); /* we don't ever want to read a fourth string; just know if
- there is one for error checking */
-
- }
-
-
- int sscanf_referer(char *inputline, int *date, int *monthno, int *year,
- int *hr, int *min, char from[MAXSTRINGLENGTH],
- char to[MAXSTRINGLENGTH])
- { /* scanning the referer log */
- /* The format is "[date] from -> to". The [date] is optional. */
-
- extern flag included(); /* in alias.c */
-
- extern struct include *refexpandhead, *noexpandhead;
-
- register char *cin = inputline;
- register char *cout;
- int i;
-
- /* scan the date */
- if (*cin == '[') {
- cin++;
- if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
- return(0);
- else
- cin += 20;
- if (*cin != ']')
- return(5);
- if (*(++cin) != ' ')
- return(5);
- cin++;
- }
- else
- *date = 0; /* as marker */
-
- /* now fill up the from string */
-
- i = 0;
- for (cout = from; *cin != ' ' && *cin != '\0' && *cin != '#' &&
- *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin == '?' && included(from, refexpandhead)) { /* read in args */
- for ( ; *cin != ' ' && *cin != '\0' && *cin != '#' &&
- i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- }
-
- /* check at this point that the line syntax is ok */
-
- if (*cin == '#' || *cin == '?') {
- while (*cin != ' ' && *cin != '\0')
- cin++;
- }
-
- if (*cin != ' ')
- return(5);
- cin++;
- if (*cin != '-')
- return(6);
- cin++;
- if (*cin != '>')
- return(6);
- cin++;
- if (*cin != ' ')
- return(6);
- cin++;
-
- /* and the to string */
-
- i = 0;
- for (cout = to; *cin != ' ' && *cin != '\0' && *cin != '\n'
- && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- if (*cin == '?' && !included(to, noexpandhead)) { /* read in args */
- for ( ; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) {
- *cout = *cin;
- cout++;
- i++;
- }
- *cout = '\0';
- }
-
- if (*cin != '\n')
- return(6);
- else
- return(7);
-
- }
-